
* "Ticking all the boxes"
* Eelco Harteveld
* Electoral Studies (2021)
* ========================

* Replication syntax for CSES

************
*** Data ***
************

cd "C:\Users\ehartev1\Dropbox\From disagree to disapprove\Various data\CSES"

* CSES data files
* Available at https://cses.org/
* Required are: IMD plus the individual wave files
use "cses_imd.dta", clear
gen A1005 = IMD1005
gen B1005 = IMD1005
gen C1005 = IMD1005
gen D1005 = IMD1005
merge 1:1 A1005 using "cses1.dta", keepusing(A5005_A-A5006_F A2023-A2025 A2021 A2019) gen(CSES1)
merge 1:1 B1005 using "cses2.dta", keepusing(B5001_A-B5002_I B3047_1-B3047_3 B2029 B2027) gen(CSES2)
merge 1:1 C1005 using "cses3.dta", keepusing(C5001_A-C5002_I C3036_1-C3036_3 C2029 C2027) gen(CSES3)
merge 1:1 D1005 using "cses4.dta", keepusing(D5001_A-D5002_I D3025_1_A-D3025_4_A_PT D2030 D2028) gen(CSES4)
recode CSES* (1=0) (3=1)

* Append CSES Module 5
append using "cses5.dta", keep(E1006_NAM E1004 E1008 E3017_A-E3017_I E3019_A-E3019_I E3020 E5002_A-E5002_I E2003 E2001_Y E2013 E2022 E2002 E2010 E3024_3 E2016 E2020) gen(CSES5)
replace IMD1006_NAM = E1006_NAM if CSES5==1
replace IMD1008_YEAR = E1008 if CSES5==1

* Cases
gen year = IMD1008_YEAR
encode IMD1006_NAM, gen(country)
egen case = group(country year)

* Merge CMP data
* File created using the DO-file "Prepare CMP"
recode country (1=75) (2=.) (3=63) (4=42) (5=78) (6=21) (7=.) (8=80) (9=62) (10=.) ///
	(11=81) (12=82) (13=13) (14=83) (15=14) (16=31) (17=41) (18=51) (19=34) (20=.) ///
	(21=86) (22=15) (23=53) (24=72) (25=32) (26=71) (27=.) (28=.) (29=87) (30=88) ///
	(31=171) (32=91) (33=22) (34=64) (35=12) (36=.) (37=.) (38=92) (39=35) (40=181) ///
	(41=93) (42=94) (43=95) (44=96) (45=97) (46=181) (47=33) (48=11) (49=43) (50=.) ///
	(51=.) (52=74) (53=98) (54=61) (55=.), gen(country_CMP)
rename (country country_CMP) (country_tmp country)
merge m:1 country year using "cmp_recoded.dta", gen(merge_CMP) // 
rename (country country_tmp) (country_CMP country)
keep if year>=1996


*************************
*** Control variables ***
*************************

recode country (4 6 13 15 16 17 18 22 23 33 35 48 49=0 "Western Europe") (19 25 39 47=1 "Southern Europe") ///
			   (1 8 11 12 14 21 29 30 32 38 41  44 45=2 "East-Central Europe") (9 54=3 "North America") ///
			   (2 7 10 31 36 55=4 "Latin America") (26 40 50=5 "Asia") (3 34=6 "Oceania") (24 46=7 "Other") (5 20 27 28 37 42 43 51 52 53=.), gen(region7)	

* SES
recode IMD3006 (11/99=.), gen(leftright)
recode E3020 (11/99=.), gen(leftright_w5)
replace leftright = leftright_w5 if CSES5==1
recode leftright (0/2=1 "Very left") (3/4=2 "Moderate left") (5=3 "Center") (6/7=4 "Moderate right") (8/10=5 "Very right"), gen(lr5)
recode IMD3010 (4=5) (5=6) (6=4) (7/9=.), gen(satdem_rev)
gen satdem = 7-satdem_rev
recode IMD2003 (0/1=1 "Primary") (2=2 "Secondary") (3/4=3 "Post-secondary") (6/9=.), gen(edu3)
recode E2003 (0/2=1 "Primary") (4=2 "Secondary") (5/9=3 "Post-secondary") (10/99=.), gen(edu3_w5)
replace edu3 = edu3_w5 if CSES5==1
recode IMD2003 (6/9=.), gen(edu5)
recode E2003 (1=0) (2/3=1) (4=2) (5/6=3) (7/9=4) (10/99=.), gen(edu5_w5)
replace edu5 = edu5_w5 if CSES5==1
recode IMD2001_1 (9997/9999=.), gen(age)
recode E2001_Y (9997/9999=.), gen(ybirth_w5)
gen age_w5 = year-ybirth_w5
replace age = age_w5 if CSES5==1
recode age (0/34=1 "Under 35") (35/54=2 "35-45") (55/200=3 "55+"), gen(age3)
recode IMD2001_2 (7/9999=.), gen(age6)
recode age_w5 (0/24=1) (25/34=2) (35/44=3) (45/54=4) (55/64=5) (65/999=6) (9997/9999=.), gen(age6_w5)
replace age6 = age6_w5 if CSES5==1
recode IMD2005 (96/99=.) (5 8 9=14), gen(reli)
recode IMD2005 (1=1 "Catholic") (2=2 "Protestant") (3 4=3 "Orthodox or other Christian") (6 7=4 "Islam") (12 97 98=5 "Atheist or DK") (99=.) (nonmissing=6 "Other"), gen(reli6)
recode E2013 (1101=1) (1200/1501=2) (1600/1699=3) (3000=4) (8200 8300 9998=5) (9001/9600 9997 9999=.) (nonmissing=6), gen(reli6_w5)
replace reli6 = reli6_w5 if CSES5==1
bys case: egen mode_reli = mode(reli6)
gen reli_maj=.
replace reli_maj = 1 if reli==mode_reli & reli6!=.
replace reli_maj = 0 if reli!=mode_reli & reli6!=.
recode reli6 (1/4=1) (5/6=2), gen(reli2)
recode IMD2007 (7/9=.), gen(urb)
recode E2022 (7/9=.), gen(urb_w5)
replace urb = urb_w5 if CSES5==1
recode IMD2002 (3/9=.) (2=0), gen(male)
recode E2002 (3/9=.) (2=0), gen(male_w5)
replace male = male_w5 if CSES5==1
recode IMD2006 (6/9=.), gen(income)
recode E2010 (6/9=.), gen(income_w5)
replace income = income_w5 if CSES5==1
gen ethn = A2021
replace ethn = B2029 if CSES2==1
replace ethn = C2029 if CSES3==1
replace ethn = D2030 if CSES4==1
replace ethn = E2016 if CSES5==1
recode ethn (996/999=.)
bys case: egen mode_ethn = mode(ethn)
gen ethn_maj=.
replace ethn_maj = 1 if ethn==mode_ethn & ethn!=.
replace ethn_maj = 0 if ethn!=mode_ethn & ethn!=.
gen area = A2019
replace area = B2027 if CSES2==1
replace area = C2027 if CSES3==1
replace area = D2028 if CSES4==1
replace area = E2020 if CSES5==1
recode area (99=.)

* Vote & PID
rename IMD3002_LH_PL vote
replace vote = IMD3002_LH_DC if vote==. | vote>9999993
recode vote (9999988/9999999=.)
recode IMD3005_1 (1=1 "Partisans") (5=0 "Non-partisans") (7/9=.), gen(partisan)

* Party sympathy
recode IMD3008_A-IMD3008_I (11/99=.)
rename (IMD3008_A-IMD3008_I) (sympA sympB sympC sympD sympE sympF sympG sympH sympI)
recode E3017_A-E3017_I (11/999=.)			// CSES5
replace sympA = E3017_A if CSES5==1
replace sympB = E3017_B if CSES5==1
replace sympC = E3017_C if CSES5==1
replace sympD = E3017_D if CSES5==1
replace sympE = E3017_E if CSES5==1
replace sympF = E3017_F if CSES5==1
replace sympG = E3017_G if CSES5==1
replace sympH = E3017_H if CSES5==1
replace sympI = E3017_I if CSES5==1
recode symp* (.=0) (nonmissing=1), gen(ans_sympA ans_sympB ans_sympC ans_sympD ans_sympE ans_sympF ans_sympG ans_sympH ans_sympI)

* Party size
gen size_A=A5005_A
gen size_B=A5005_B
gen size_C=A5005_C
gen size_D=A5005_D
gen size_E=A5005_E
gen size_F=A5005_F
gen size_G=.
gen size_H=.
gen size_I=.
replace size_A=B5001_A if CSES2==1
replace size_B=B5001_B if CSES2==1
replace size_C=B5001_C if CSES2==1
replace size_D=B5001_D if CSES2==1
replace size_E=B5001_E if CSES2==1
replace size_F=B5001_F if CSES2==1
replace size_G=B5001_G if CSES2==1
replace size_H=B5001_H if CSES2==1
replace size_I=B5001_I if CSES2==1
replace size_A=C5001_A if CSES3==1
replace size_B=C5001_B if CSES3==1
replace size_C=C5001_C if CSES3==1
replace size_D=C5001_D if CSES3==1
replace size_E=C5001_E if CSES3==1
replace size_F=C5001_F if CSES3==1
replace size_G=C5001_G if CSES3==1
replace size_H=C5001_H if CSES3==1
replace size_I=C5001_I if CSES3==1
replace size_A=D5001_A if CSES4==1
replace size_B=D5001_B if CSES4==1
replace size_C=D5001_C if CSES4==1
replace size_D=D5001_D if CSES4==1
replace size_E=D5001_E if CSES4==1
replace size_F=D5001_F if CSES4==1
replace size_G=D5001_G if CSES4==1
replace size_H=D5001_H if CSES4==1
replace size_I=D5001_I if CSES4==1
recode E5002_A-E5002_I (101/999=.)	   
replace size_A = E5002_A if CSES5==1
replace size_B = E5002_B if CSES5==1
replace size_C = E5002_C if CSES5==1
replace size_D = E5002_D if CSES5==1
replace size_E = E5002_E if CSES5==1
replace size_F = E5002_F if CSES5==1
replace size_G = E5002_G if CSES5==1
replace size_H = E5002_H if CSES5==1
replace size_I = E5002_I if CSES5==1

* - Fractions & relative size
recode size* (100/999999=.)
recode size* (.=0)
egen total_size = rowtotal(size_A-size_I)
gen rel_total_size_ = (ans_sympA*size_A + ans_sympB*size_B + ans_sympC*size_C + ans_sympD*size_D + ///
	ans_sympE*size_E + ans_sympF*size_F + ans_sympG*size_G + ans_sympH*size_H + ans_sympI*size_I)
gen sizefactor = 100 / rel_total_size	
recode sizefactor (.=0)
replace size_A = (size_A / 100)
replace size_B = (size_B / 100)
replace size_C = (size_C / 100)
replace size_D = (size_D / 100)
replace size_E = (size_E / 100)
replace size_F = (size_F / 100)
replace size_G = (size_G / 100)
replace size_H = (size_H / 100)
replace size_I = (size_I / 100)

* Left-Right placement
recode IMD3007_A-IMD3007_I (11/99=.)
rename (IMD3007_A-IMD3007_I) (LR_A LR_B LR_C LR_D LR_E LR_F LR_G LR_H LR_I)
recode E3017_A-E3017_I (11/999=.) 			// CSES5
replace LR_A = E3017_A if CSES5==1
replace LR_B = E3017_B if CSES5==1
replace LR_C = E3017_C if CSES5==1
replace LR_D = E3017_D if CSES5==1
replace LR_E = E3017_E if CSES5==1
replace LR_F = E3017_F if CSES5==1
replace LR_G = E3017_G if CSES5==1
replace LR_H = E3017_H if CSES5==1
replace LR_I = E3017_I if CSES5==1

* Vote & pid
recode IMD5000_* (9999999=.)
recode E3024_3 ///
		(040001 152001 276001 300001 348001 372001 380001 440001 499001 410001 158001 840001=1 "A") ///
		(040002 152002 276002 300002 348002 372002 380002 440002 499002 410002 158002 840002=2 "B") ///
		(040003 152003 276003 300003 348003 372003 380003 440003 499003 410003 158003 840003=3 "C") ///
		(040004 152004 276004 300004 348004 372004 380004 440004 499004 410004 158004 840004=4 "D") ///
		(040005 152005 276005 300005 348005 372005 380005 440005 499005 158005=5 "E") ///
		(040006 152006 276006 300006 348006 372006 380006 440006 499006 158006=6 "F") ///
		(152007 276007 300007 348007 372007 440007 499007 158007=7 "G") ///
		(152008 276008 300008 348008 372008 440008 499008=8 "H") ///
		(152009 499009=9 "I") (nonmissing=.), gen(pid5_num) 
decode pid5_num, gen(pid_w5)

gen voted=""
replace voted="A" if vote==IMD5000_A & vote!=.
replace voted="B" if vote==IMD5000_B & vote!=.
replace voted="C" if vote==IMD5000_C & vote!=.
replace voted="D" if vote==IMD5000_D & vote!=.
replace voted="E" if vote==IMD5000_E & vote!=.
replace voted="F" if vote==IMD5000_F & vote!=.
replace voted="G" if vote==IMD5000_G & vote!=.
replace voted="H" if vote==IMD5000_H & vote!=.
replace voted="I" if vote==IMD5000_I & vote!=.
encode voted, gen(votechoice)
gen pid=""
replace pid="A" if IMD3005_3==IMD5000_A & IMD3005_3!=.
replace pid="B" if IMD3005_3==IMD5000_B& IMD3005_3!=.
replace pid="C" if IMD3005_3==IMD5000_C& IMD3005_3!=.
replace pid="D" if IMD3005_3==IMD5000_D& IMD3005_3!=.
replace pid="E" if IMD3005_3==IMD5000_E& IMD3005_3!=.
replace pid="F" if IMD3005_3==IMD5000_F& IMD3005_3!=.
replace pid="G" if IMD3005_3==IMD5000_G& IMD3005_3!=.
replace pid="H" if IMD3005_3==IMD5000_H& IMD3005_3!=.
replace pid="I" if IMD3005_3==IMD5000_I& IMD3005_3!=.
replace pid = pid_w5 if CSES5==1
recode IMD3005_3 (9999989/9999999=.), gen(partyID)

* CMP
gen sal_total = sal_eco + sal_cul

* Weights
rename (IMD1010_1-IMD1010_3) (wgt1 wgt2 wgt3)


******************************
*** Affective Polarization ***
******************************

egen mn_symp = rowmean(sympA-sympI)
egen nm_symp = rownonmiss(sympA-sympI)

* Affective Polarization measure (Wagner 2019)
* - unweighted
local vars = "A B C D E F G H I"
foreach var of newlist `vars' {
	gen comp_`var' = (symp`var' - mn_symp)^2/nm_symp
	}
egen comp = rowtotal(comp_*), missing
gen UAP = sqrt(comp)
drop comp*

* - weighted
local vars = "A B C D E F G H I"
foreach var of newlist `vars' {
	gen wgt_sy_`var' = symp`var'*size_`var'
	}
egen wgt_sy = rowtotal(wgt_sy_*), missing
foreach var of newlist `vars' {
	gen comp_`var' = size_`var'*(symp`var'-wgt_sy)^2
	}
egen comp = rowtotal(comp_*), missing
gen WAP = sqrt(comp)
drop comp*

* NoZ
recode sympA-sympI (0=1) (nonmissing=0), gen(nosympA nosympB nosympC nosympD nosympE nosympF nosympG nosympH nosympI)
egen tot_nosymp = rowtotal(nosymp*)
gen rel_nosymp = tot_nosymp/nm_symp

* Affective Polarization Index (Reiljan 2019)
gen AP_A=.
gen AP_B=.
gen AP_C=.
gen AP_D=.
gen AP_E=.
gen AP_F=.
gen AP_G=.
gen AP_H=.
gen AP_I=.
gen AP_comp_A=.
gen AP_comp_B=.
gen AP_comp_C=.
gen AP_comp_D=.
gen AP_comp_E=.
gen AP_comp_F=.
gen AP_comp_G=.
gen AP_comp_H=.
gen AP_comp_I=.

egen case_API = group(country year)

sum case_API
local max = r(max)
capture drop comp_*
local vars = "A B C D E F G H I"
foreach n of numlist 1/`max' {
	display _newline ">>>> case_API `n'"
	foreach var1 of newlist `vars' {
		display _continue " `var1'"
		sum symp`var1' if pid=="`var1'" & case_API==`n'
		local mn1 = r(mean)
		foreach var2 of newlist `vars' {
			quietly sum symp`var2' if case_API==`n' & voted=="`var1'"
			local mn2 = r(mean)
			quietly sum size_`var1' if case_API==`n'
			local si1 = r(mean)
			quietly sum size_`var2' if case_API==`n'
			local si2 = r(mean)
			gen comp_`var2' = (`mn1' - `mn2') * (`si2' / (1-`si1'))
			}
		egen tmp = rowtotal(comp_A comp_B comp_C comp_D comp_E comp_F comp_G comp_H comp_I), missing
		sum comp_A comp_B comp_C comp_D comp_E comp_F comp_G comp_H comp_I
		local AP_`var1' = tmp
		drop comp_*
		drop tmp
		*local AP_`var1' = `comp_A' + `comp_B' + `comp_B' + `comp_C' + `comp_D' + `comp_E' + `comp_F' + `comp_G' + `comp_H' + `comp_I'
		display `AP_`var1''
		replace AP_`var1' = `AP_`var1'' if case_API==`n'
		 sum size_`var1' if case_API==`n'
		local partysize = r(mean)
		 replace AP_comp_`var1' = `AP_`var1'' * `partysize' if case_API==`n'
		}
	}

egen API = rowtotal(AP_comp_*), missing
mean API, over(country year)

********************************
*** Ideological polarization ***
********************************

* Among citizens
bys case: egen sd_leftright_cit = sd(leftright)

* Among elites
egen nm_LR = rownonmiss(LR_A-LR_I)

gen wgt_LR_A = LR_A*size_A
gen wgt_LR_B = LR_B*size_B
gen wgt_LR_C = LR_C*size_C
gen wgt_LR_D = LR_D*size_D
gen wgt_LR_E = LR_E*size_E
gen wgt_LR_F = LR_F*size_F
gen wgt_LR_G = LR_G*size_G
gen wgt_LR_H = LR_H*size_H
gen wgt_LR_I = LR_I*size_I
egen wgt_mn_LR = rowtotal(wgt_LR_A-wgt_LR_I)

replace LR_A = wgt_mn_LR if LR_A==.
replace LR_B = wgt_mn_LR if LR_B==.
replace LR_C = wgt_mn_LR if LR_C==.
replace LR_D = wgt_mn_LR if LR_D==.
replace LR_E = wgt_mn_LR if LR_E==.
replace LR_F = wgt_mn_LR if LR_F==.
replace LR_G = wgt_mn_LR if LR_G==.
replace LR_H = wgt_mn_LR if LR_H==.
replace LR_I = wgt_mn_LR if LR_I==.

gen ideopol = sqrt( ///
			 (size_A*(LR_A - wgt_mn_LR)^2 + size_B*(LR_B - wgt_mn_LR)^2 + size_C*(LR_C - wgt_mn_LR)^2 ///
			 + size_D*(LR_D - wgt_mn_LR)^2 + size_E*(LR_E - wgt_mn_LR)^2 + size_F*(LR_F - wgt_mn_LR)^2 ///
			 + size_G*(LR_G - wgt_mn_LR)^2 + size_H*(LR_H - wgt_mn_LR)^2 + size_I*(LR_I - wgt_mn_LR)^2 ///
			 ))
replace ideopol=. if nm_LR<2 | ideopol==0		

bys case: egen mn_ideopol = mean(ideopol)

**********************
*** Social sorting ***
**********************

* Select viable parties
egen party = group(country year vote)
bys party: gen partysize = _N if vote!=.
replace vote=. if vote==9999999

* Aggregate: Cramér`s V (main analysis)
capture drop CC_*
capture drop mn_CC min_CC prod_CC

sum case
local max = r(max)
local vars = "edu3 income reli_maj area"
	foreach var of varlist `vars' {
		gen CC_ideo_`var' = .
		forvalues c = 1/`max' {
			tab partyID `var' if case==`c' & partysize>25, V
			replace CC_ideo_`var' = r(CramersV) if case==`c'
		}
	}
	
* Individual level: absolute residual (robustness)
set matsize 11000
local vars = "i.reli_maj i.area i.edu3 i.income"
local vars2 = "reli_maj area edu3 income"
gen pred = .
gen R2SES = .
regress API `vars' i.vote i.country i.year
gen keep = 1 if e(sample)
drop if keep!=1
drop keep
egen cases = group(country year)
sum cases, detail
local max = r(max)
forvalues c = 1/`max' {
	egen cases_pt = group(vote) if cases == `c'
	sum cases_pt if cases == `c'
	local max_pt = r(max)
	tab vote if cases==`c', gen(vt_)
	forvalues p = 1/`max_pt' {
		regress vt_`p' `vars' if cases == `c'
		predict pred_tmp, resid
		replace pred = abs(pred_tmp) if vt_`p'==1 & cases==`c'
		drop pred_tmp 
		}
	drop cases_pt
	drop vt_*
	}
gen sorting = 1 - pred

* Rounds and observations
gen round = .
replace round=1 if CSES1==1
replace round=2 if CSES2==1
replace round=3 if CSES3==1
replace round=4 if CSES4==1
replace round=5 if CSES5==1
egen obs_symp = rownonmiss(sympA-sympI)

	
**************
*** Labels ***
**************

lab var WAP "Affective polarization (WAP)"
lab var sd_leftright2 "Ideological polarization (parties)"
lab var sd_leftright_cit "Ideological polarization (voters)"
lab var partisan "Partisan"
lab var male "Male"
lab var year "Year"

lab var ideopol "Perceived ideological polarization"
lab var sal_cul "Salience of cultural issues"
lab var sal_eco "Salience of economic issues"
lab var mn_ideopol "Mean ideological polarization"
lab var mn_PRR "Support for PRR"
lab var ENOP "Effective number of parties"

*******************
*** Center vars ***
*******************

foreach var of varlist WAP sal_cul sal_eco mn_ideopol mn_PRR ENOP {
	sum `var'
	local mean = r(mean)
	gen c_`var' = `var' - `mean'
	}

	
pauze
	
****************
*** Analysis ***
****************

* Aggregate data
keep if year>=1996 & case!=. // reduce file size
capture drop id
gen id = _n
collapse (mean) WAP* UAP* API CSES5 region7 sal_cul sal_eco sd_leftright_cit sd_leftright2 ENOP mn_PRR CC_* mn_ideopol ideopol partisan year (firstnm) IMD1004 country (count) id (max) n_parties=obs_symp round, by(case) 
duplicates drop country year, force
bys country: gen wave = _n
tsset country wave

* Social sorting scores
egen prod_CC2a = rowmean(CC_ideo_edu3 CC_ideo_income CC_ideo_area CC_ideo_reli) // mean PRODCC (main variable)
egen prod_CC2b = rowmax(CC_ideo_edu3 CC_ideo_income CC_ideo_area CC_ideo_reli) // max PRODCC (robustness)

* Center variables
local vars "WAP API CC* prod_CC2* sal_cul sal_eco ideopol sd_leftright_cit sd_leftright2 n_parties ENOP mn_PRR" 
foreach var of varlist `vars' {
	bys country: egen mean_`var' = mean(`var')
	bys country: egen sd_`var' = sd(`var')
	gen c_`var' = `var' - mean_`var'
	gen std_`var' = (`var' - mean_`var') / sd_`var'
	egen std2_`var' = std(`var')
	}
	
* Labels
lab var std_API "Affective Polarization Index"
lab var year "Year"
lab var std_ENOP "Effective number of parties"
lab var std_sd_leftright_cit "Ideological polarization (voters)"
lab var std_ideopol "Ideological polarization (voters)"
lab var std_sd_leftright2 "Ideological polarization (parties)"
lab var std_sal_cul "Salience of cultural issues"
lab var std_sal_eco "Salience of economic issues"
lab var std2_prod_CC2a "Social sorting (mean Cramer)"
lab var std2_prod_CC2b "Social sorting (max Cramer)"
lab var std2_API "Affective Polarization Index"
lab var std2_ENOP "Effective number of parties"
lab var std2_n_parties "Number of parties"
lab var std2_sd_leftright_cit "Ideological polarization (voters)"
lab var std2_ideopol "Ideological polarization (voters)"
lab var std2_sd_leftright2 "Ideological polarization (parties)"
lab var std2_sal_cul "Salience of cultural issues"
lab var std2_sal_eco "Salience of economic issues"
lab var std2_CC_ideo_edu3 "Education"
lab var std2_CC_ideo_income "Income"
lab var std2_CC_ideo_area "Region"
lab var std2_CC_ideo_reli_maj "Religion"

lab values region7 region7


* Descriptives (using aggregated data)
* ------------------------------------

* Appendix A, Table 2
xtsum API prod_CC2 ideopol sd_leftright2 sal_cul sal_eco year if e(sample)

* Appendix B, Figure 1 and 2
xtset country year
labdu country, delete report
lab values country country
drop if std2_API==. | std2_ideopol==. | std2_prod_CC2a==.
xtline std2_API std2_ideopol std2_prod_CC2a , scheme(cleanplots) ///
	byopts(cols(6) note("")) ysize(8) xtitle("") ///
	legend(size(tiny) order(1 "Affective polarization" 2 "Ideological polarization (voters)" 3 "Social sorting"))
graph export "Appendix - trends by country.tif", width(2000) replace

xtset country year
xtline CC_ideo_edu3 CC_ideo_income CC_ideo_area CC_ideo_reli_maj, scheme(538w) ///
	byopts(cols(6) note("")) ysize(8) xtitle("") ///
	legend(size(tiny) order(1 "Sorting with ideology" 2 "Sorting with income" 3 "Sorting with region" 4 "Sorting with religion")) 
graph export "Appendix - trends by country (sorting).tif", width(2000) replace

* Regressions
* -----------

* Main model
xtreg std2_API c.std2_prod_CC2a std2_ideopol std2_sd_leftright2 std2_sal_cul std2_sal_eco std2_n_parties  i.round, fe level(95)
estimates store FE
xtreg std2_API c.std2_prod_CC2a std2_ideopol std2_sd_leftright2 std2_sal_cul std2_sal_eco std2_n_parties i.round, be level(95)
estimates store BE
coefplot FE BE, drop(_cons 2.round 3.round 4.round 5.round) scheme(cleanplots) xline(0) ///
	legend(order(2 "Within countries (FE)" 4 "Between countries (BE)")) ///
	rename(std2_prod_CC2a="Social sorting") ///
	name(gr1, replace) title("") 
graph export "Figure 1.tif", replace width(4000)
estout FE BE using "Main models CSES.xls", stats(N N_g) cells(b(star fmt(3)) se(par(`"="("' `")""') fmt(3))) starlevels(+ 0.10 * 0.05 ** 0.01 *** 0.001)  varlabels(_cons Intercept) noomit nobase label varwidth(40) replace

* Robustness
* ----------

* Robustness: alternative operationalizations of social sorting
local othervars = "std2_n_parties std2_ideopol std2_sd_leftright2 std2_sal_cul std2_sal_eco i.round"
xtreg std2_API std2_prod_CC2a `othervars', fe level(95)
estimates store m1
xtreg std2_API std2_prod_CC2b `othervars', fe level(95)
estimates store m2
xtreg std2_API std2_CC_ideo_edu3 `othervars', fe level(95)
estimates store m3
xtreg std2_API std2_CC_ideo_income `othervars', fe level(95)
estimates store m4
xtreg std2_API std2_CC_ideo_area `othervars', fe level(95)
estimates store m5
xtreg std2_API std2_CC_ideo_reli `othervars', fe level(95)
estimates store m6
coefplot m1 m2 m3 m4 m5 m6, drop(_cons `othervars' 2.round 3.round 4.round 5.round) ///
	scheme(cleanplots) xline(0) name(gr1, replace) title("") ///
	heading(std2_prod_CC2a="{bf:Aggregate social sorting scores}" std2_CC_ideo_edu3="{bf:Individual social sorting scores}") /// 	rename(std2_prod_CC2a="Mean overlap" std2_prod_CC2b="Max overlap") ///
	legend(off)
graph export "Figure 2.tif", replace width(4000)
estout m1 m2 m3 m4 m5 m6 using "Main models CSES - different DVs.xls", stats(N N_g) cells(b(star fmt(3)) se(par(`"="("' `")""') fmt(3))) starlevels(+ 0.10 * 0.05 ** 0.01 *** 0.001)  varlabels(_cons Intercept) noomit nobase label varwidth(40) replace

* Robustness: FD
xtset country wave
xtreg d.std2_API d.std2_prod_CC2a d.n_parties d.ideopol d.sd_leftright2 d.sal_cul d.sal_eco
estout using "Robustness FD.xls", stats(N N_g) cells(b(star fmt(3)) se(par(`"="("' `")""') fmt(3))) starlevels(+ 0.10 * 0.05 ** 0.01 *** 0.001)  varlabels(_cons Intercept) noomit nobase label varwidth(40) replace

* Robustness: jackknife
jackknife, cluster(country): xtreg std2_API c.std2_prod_CC2a std2_n_parties std2_ideopol std2_sd_leftright2 std2_sal_cul std2_sal_eco i.round, fe level(95)
estimates store FE
jackknife, cluster(country): xtreg std2_API c.std2_prod_CC2a std2_n_parties std2_ideopol std2_sd_leftright2 std2_sal_cul std2_sal_eco i.round, be level(95)
estimates store BE
coefplot FE BE, drop(_cons) scheme(plotplainblind) xline(0) legend(order(2 "Within countries (FE)" 4 "Between countries (BE)")) name(gr1, replace) title("")
estout FE BE using "Robustness jackknife.xls", stats(N N_g) cells(b(star fmt(3)) se(par(`"="("' `")""') fmt(3))) starlevels(+ 0.10 * 0.05 ** 0.01 *** 0.001)  varlabels(_cons Intercept) noomit nobase label varwidth(40) replace
